####Open R in the command terminal:
R --max-ppsize 500000



> library(MLSeq)
> library(dplyr)
> library("S4Vectors")
> library("e1071")
> data <- read.table("Input.csv", header=TRUE)
> class <- DataFrame(condition = factor(rep(c("JK1","Jurkat", "BJ", "K562"), c(96, 96, 96, 96))))   ### For Mouse: class <- DataFrame(condition = factor(rep(c("2i","mES"), c(96, 96))))
> library(DESeq2)
##Partition the data into 70% Training set and 30% Test set.
> nTest <- ceiling(ncol(data) * 0.3)
> ind <- sample(ncol(data), nTest, FALSE)
> nTest
> ind
> data.train <- as.matrix(data[ ,-ind] + 1)
> data.test <- as.matrix(data[ ,ind] + 1)
> classtr <- DataFrame(condition = class[-ind, ])
> classts <- DataFrame(condition = class[ind, ])
> data.trainS4 = DESeqDataSetFromMatrix(countData = data.train, colData = classtr, design = formula(~condition))
> data.testS4 = DESeqDataSetFromMatrix(countData = data.test, colData = classts, design = formula(~condition))

### Training using several methods:

###SVM:
> fit.svm <- classify(data = data.trainS4, method = "svmRadial", preProcessing = "deseq-vst", ref = "BJ", tuneLength = 10, control = trainControl(method = "repeatedcv", number = 5, repeats = 10, classProbs = TRUE))
> show(fit.svm)
> trained(fit.svm)
> plot(fit.svm)

###PLDA:
> ctrl.plda <- discreteControl(method = "repeatedcv", number = 5, repeats = 1, tuneLength = 10)
> fit.plda <- classify(data = data.trainS4, method = "PLDA", normalize = "deseq", ref = "BJ", control = ctrl.plda)
> trained(fit.plda)

###voomDLDA:
> ctrl.voomDLDA <- voomControl(method = "repeatedcv", number = 5, repeats = 1, tuneLength = 10)
> fit.voomDLDA <- classify(data = data.trainS4, method = "voomDLDA", normalize = "deseq", ref = "BJ", control = ctrl.voomDLDA)
> fit.plda

### Random Forest:
> fit.rf <- classify(data = data.trainS4, method = "rf", preProcessing = "deseq-vst", ref = "BJ", tuneLength = 10, control = trainControl(method = "repeatedcv", number = 5, repeats = 10, classProbs = TRUE))

> plot(fit.svm)
> plot(fit.PLDA)
> plot(fit.plda)
> plot(fit.voomDLDA)
> plot(fit.rf)

##After determining the most accurate method of training (Example if it was random forest):

> pred.rf <- predict(fit.rf, data.testS4)
> pred.rf <- relevel(pred.rf, ref = "BJ")
> actual <- relevel(classts$condition, ref = "BJ")
> tbl <- table(Predicted = pred.rf, Actual = actual)
> cm <- confusionMatrix(tbl, positive = "BJ")
> cm
